This document presents the subset of the figures used for paper about monitoring.

Data and packages

library(dplyr)
library(tidyr)
library(ggplot2)
library(ggmap)
library(scatterpie)
library(rgdal)
library(multcompView)
library(car)
library(ggpmisc)
library(chisq.posthoc.test)
library(vcd)

Sampling data correspond to the data collected with kobo and previously cleaned with the script 1_preprocesamiento_datos_kobo.Rmd.

# load data
muestreo_tidy<-read.delim("../data/kobo/muestreo_dic2020_tidy.txt", header = TRUE)
parcelas_tidy<-read.delim("../data/kobo/parcelas_dic2020_tidy.txt", header = TRUE)

# pivot long parcelas data to have health data as a single variable
parcelas_long<-pivot_longer(parcelas_tidy, 
                            cols = healthy:worm, 
                            names_to = "tree_health_simplified",
                            values_to = "n_trees")

Data analyzed here correspond only to the trees that were approved during the validation by manually reviewing the photographs in kobotoolbox. Total of 1778 trees sampled, 1765 were approved in the validation.

muestreo_tidy<- filter(muestreo_tidy, X_validation_status=="validation_status_approved")

Color palettes:

# Make a nice color pallete and legend order for all plots

my_cols=c("darkgreen", 
              "darkred", 
              "orangered1", 
              "cadetblue", 
              "tan", 
              "beige", 
            #  "burlywood4", 
              "coral", 
              "aquamarine3", 
              "gray70", 
              "black")

desired_order=c("healthy", 
                "ozone", 
                "ozone_and_other", 
                "others_combined", 
                "drougth", 
                "fungi", 
             #   "insect", 
                "worm", 
                "acid_rain", 
                "other", 
                "dead")

desired_names=c("healthy", 
                "ozone", 
                "ozone and other", 
                "others combined", 
                "drougth", 
                "fungi", 
             #   "insect", 
                "worm", 
                "acid rain", 
                "other", 
                "dead")

# For ozone damage percentage 
 my_cols2<-c("darkgreen", "gold2", "chocolate1", "orangered", "red4", "darkorchid4")
 
desired_order_percentage<-c("0%","less than 10%", "10 to 40%", "40 to 50%", "50 to 70%", "more than 70%")

Multiplot fun:

# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols:   Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                    ncol = cols, nrow = ceiling(numPlots/cols))
  }

 if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

Configure google api for maps:

# code adapted from https://rgraphgallery.blogspot.com/2013/04/rg-plot-pie-over-g0ogle-map.html

## configure google api

# You first need to register your api key in https://cloud.google.com/maps-platform/#get-started and follow instructions. The geocoding API is a free service, but you nevertheless need to associate a credit card with the account. Please note that the Google Maps API is not a free service. There is a free allowance of 40,000 calls to the geocoding API per month, and beyond that calls are $0.005 each.
# after you obtain your api, save it in /scripts/api_key.api (not shown in this repo por obvious reasons).

# if you get the following error when running get_map():

#"Error in aperm.default(map, c(2, 1, 3)) : 
#  invalid first argument, must be an array " 

# check this troubleshooting: https://rgraphgallery.blogspot.com/2013/04/rg-plot-pie-over-google-map.html

##  load and register api
api <- readLines("api_key.api")
register_google(key = api)

Map and monitoring figures presented in the paper:

Figure 2

Plot 2a: PNDL location on CDMX map

# get cdmx shape
CDMX<-readOGR(dsn="../data/spatial", layer="CDMX")
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/veronicareyesgalindo/Documents/GitHub/monitoreo-oyameles/data/spatial", layer: "CDMX"
## with 1 features
## It has 8 fields
CDMX<-fortify(CDMX)

# get PNDL shape
PNDL<-readOGR(dsn="../data/spatial", layer="Desierto_Leones_Geo_ITRF08")
## Warning in OGRSpatialRef(dsn, layer, morphFromESRI = morphFromESRI, dumpSRS =
## dumpSRS, : Discarded datum International_Terrestrial_Reference_Frame_2008 in
## Proj4 definition: +proj=longlat +ellps=GRS80 +no_defs
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/veronicareyesgalindo/Documents/GitHub/monitoreo-oyameles/data/spatial", layer: "Desierto_Leones_Geo_ITRF08"
## with 1 features
## It has 14 fields
PNDL<-fortify(PNDL)

# get background map
sat_map = get_map(location = c(lon = -99.133549, lat = 19.3), zoom = 10, maptype = 'terrain-background', source = "google")

## plot
p_a<-ggmap(sat_map) + 
            geom_polygon(data = CDMX,
                         aes(x = long, y = lat, group = group),
                         color="black", fill=NA, size=1.5) +
            geom_polygon(data = PNDL,
                         aes(x = long, y = lat, group = group),
                         color="red", fill=NA, size=1.5) +
            geom_point(aes(x=-98.95, y=19.6), 
                       shape=0, stroke=2, size=5, color="black") +
            geom_point(aes(x=-98.95, y=19.55), 
                       shape=0, stroke=2, size=5, color="red") +
            geom_text(aes(label="CDMX", x=-98.87, y=19.6), 
                      color="Black", fontface="bold", size=5) +
            geom_text(aes(label="PNDL", x=-98.87, y=19.55), 
                      color="Black", fontface="bold", size=5) +
            theme(text = element_text(size = 20))+
  ggtitle("a)")

Plot 2b: Satellite image and surroundings of the PNDL

# get background map
sat_map = get_map(location = c(lon = -99.30, lat = 19.31), zoom = 13, maptype = 'satellite', source = "google")

## add towns names
towns<-data.frame(nombre=c("San Bartolo Ameyalco", 
                           "Santa Rosa Xochiac", 
                           "San Mateo Tlaltenango"),
                  long=c(-99.270, -99.29, -99.276),
                  lat=c(19.333, 19.325, 19.346))



## plot
p_b<-ggmap(sat_map) + 
            geom_polygon(data = PNDL,
                         aes(x = long, y = lat, group = group),
                         color="red", fill=NA, size=1.5) +
            geom_point(data=towns, aes(x=long, y=lat), colour="red", size=1.5) +
            geom_text(data=towns, aes(label=nombre, x=long, y=lat), 
                      color="white", fontface="bold",
                      size=5, nudge_y=0.003) +
  # add Cruz de Coloxtitla (CX), and Convento (Cn) landmarks
            geom_text(aes(label="X", x=-99.3014, y=19.286068), 
                      color="white", fontface="bold", size=4) +
            geom_text(aes(label="C", x=-99.31, y=19.3133), 
                      color="white", fontface="bold", size=4) +
            theme(text = element_text(size = 20))+
  ggtitle("b)")

Plot 2c: This is the distribution of the 48 plots:

## plot map
# get map
sat_map = get_map(location = c(lon = -99.3060, lat = 19.2909), zoom = 14, maptype = 'satellite', source = "google")

# plot sampled plots
p_c <-  ggmap(sat_map)
p_c <- p_c + geom_point(data=parcelas_tidy,
                      aes(x=X_coordinates_longitude,
                          y=X_coordinates_latitude),
                      color="red") +
          geom_text(data=parcelas_tidy,
                      aes(x=X_coordinates_longitude,
                          y=X_coordinates_latitude,
                          label=plot),
                      color="white",
                     check_overlap = TRUE,
                      hjust = 0, vjust=1, nudge_x = 0.0005,
                 size= 5) +
    theme(text = element_text(size = 20))+
  ggtitle("c)")

Plot 2d: Distribution of tree health status by plot

The following figure shows the total number of trees sampled in each 10x10 m plot, and how many of these are under some category of damage:

p_d <- ggplot(parcelas_long, aes(x=plot, y=n_trees,     fill=tree_health_simplified)) +
  geom_bar(stat="identity") +
  scale_fill_manual(values= my_cols, breaks = desired_order,
                    labels= desired_names,
                    name= "Health status") 
  

p_d <- p_d + theme_bw() +
  labs(x="Plots", y= "Number of trees") +
  theme(text = element_text(size = 20)) +
  ggtitle("d)")

Multiplot

multiplot(p_a, p_c, p_b, p_d, cols=2)

Figure 3

el estado de salud del aárbol es independiente de su condición de reforestación H:árbolres dañados por ozono dependen de si fueron reforestados

Figure 3a y b Reforested

# Select tree reforested and covered data 
cont_tab<- select(muestreo_tidy, contains(c("tree_health_simplified", "reforested", "tree_exposition"))) %>%
  filter(tree_health_simplified == "healthy"| tree_health_simplified == "ozone" | tree_health_simplified == "ozone_and_other")

table(cont_tab)
## , , tree_exposition = cover
## 
##                       reforested
## tree_health_simplified  no yes
##        healthy         247  75
##        ozone            42  20
##        ozone_and_other 278  41
## 
## , , tree_exposition = exposed
## 
##                       reforested
## tree_health_simplified  no yes
##        healthy         136  31
##        ozone            52  11
##        ozone_and_other 157  23
tab_reforested<- as.table(array(c(sum(with(cont_tab, tree_health_simplified == "healthy" & reforested == "yes")),
                              sum(with(cont_tab, tree_health_simplified == "ozone" & reforested == "yes")),
                              sum(with(cont_tab, tree_health_simplified == "ozone_and_other" & reforested == "yes")),
               sum(with(cont_tab, tree_health_simplified == "healthy" & reforested == "no")),
                              sum(with(cont_tab, tree_health_simplified == "ozone" & reforested == "no")),
                              sum(with(cont_tab, tree_health_simplified == "ozone_and_other" & reforested == "no"))),
             dim=c(3,2), dimnames=list( c("healthy","ozone","ozone and other"), c("yes","no"))))

# Pass data matrix to chisq.posthoc.test function
names(attributes(tab_reforested)$dimnames) <- c("healthy status", "reforested")

# Barplot
p_3a<-ggplot(cont_tab, aes(reforested, ..count..)) +
  geom_bar(aes(fill = tree_health_simplified), position = "dodge")+
  scale_fill_manual(name ="Health status", values = c("healthy" = "darkgreen", "ozone" = "darkred", "ozone_and_other" = "orangered1"),
                    labels= c("healthy", "ozone","ozone and other"))+
  theme_bw()+ ggtitle("a)")+ theme(legend.title.align = 0.5)+ theme(text = element_text(size = 20))+
  theme(plot.title = element_text(lineheight=1.1, face="bold"))+
  labs(y="Number of trees", x= "Reforested")
  
# Mosaic Plot with vcd library
p_3b<-mosaic(tab_reforested, shade=TRUE, legend=TRUE,
                labeling_args=list(rot_labels=c(bottom=90,top=0),gp_labels=(gpar(fontsize=12))))

# Pruebas de otros mosaicos
## OPCION 1
library("graphics")
mosaicplot(tab_reforested, shade = TRUE, las=2,
           main = "housetasks")

## OPCION 2
# install.packages("vcd")
library("vcd")
# plot just a subset of the table
assoc(head(tab_reforested, 5), shade = TRUE, las=2)

# Chi2
chisq <- chisq.test(tab_reforested)
chisq
## 
##  Pearson's Chi-squared test
## 
## data:  tab_reforested
## X-squared = 17.399, df = 2, p-value = 0.0001666
# Observed counts
chisq$observed
##                  reforested
## healthy status    yes  no
##   healthy         106 383
##   ozone            31  94
##   ozone and other  64 435
# Expected counts
round(chisq$expected,2)
##                  reforested
## healthy status      yes     no
##   healthy         88.31 400.69
##   ozone           22.57 102.43
##   ozone and other 90.12 408.88
# Pearson residuals (residuos estandarizados)
round(chisq$residuals, 3)
##                  reforested
## healthy status       yes     no
##   healthy          1.882 -0.884
##   ozone            1.773 -0.833
##   ozone and other -2.751  1.292
# Visuaalize Pearson residuals
library(corrplot)
corrplot(chisq$residuals, is.cor = FALSE)

# Contibution in percentage (%)
contrib <- 100*chisq$residuals^2/chisq$statistic
round(contrib, 3)
##                  reforested
## healthy status       yes     no
##   healthy         20.366  4.489
##   ozone           18.075  3.984
##   ozone and other 43.499  9.587
# Visualize the contribution
corrplot(contrib, is.cor = FALSE)

Figure 3c y d Covered

tab_covered<- as.table(array(c(sum(with(cont_tab, tree_health_simplified == "healthy" & tree_exposition == "cover")),
                sum(with(cont_tab, tree_health_simplified == "ozone" & tree_exposition == "cover")),
                sum(with(cont_tab, tree_health_simplified == "ozone_and_other" & tree_exposition == "cover")),
                sum(with(cont_tab, tree_health_simplified == "healthy" & tree_exposition == "exposed")),
                sum(with(cont_tab, tree_health_simplified == "ozone" & tree_exposition == "exposed")),
                sum(with(cont_tab, tree_health_simplified == "ozone_and_other" & tree_exposition == "exposed"))),
             dim=c(3,2), dimnames=list( c("healthy","ozone","ozone and other"), c("yes","no"))))

# Pass data matrix to chisq.posthoc.test function
names(attributes(tab_covered)$dimnames) <- c("healthy status", "covered")

#Barplot
p_3c<-ggplot(cont_tab, aes(tree_exposition, ..count..)) +
  geom_bar(aes(fill = tree_health_simplified), position = "dodge")+
  scale_fill_manual(name ="Health status", 
                    values = c("healthy" = "darkgreen", "ozone" = "darkred", "ozone_and_other" = "orangered1"), 
                    labels= c("healthy", "ozone","ozone and other"))+
  theme_bw()+ ggtitle("c)")+theme(legend.title.align = 0.5)+theme(text = element_text(size = 20))+ 
  theme(plot.title = element_text(lineheight=1.1, face="bold"))+
  labs(y="Number of trees", x= "Tree exposition")

# Mosaic Plot with vcd library
p_3d <- mosaic(tab_covered, shade=TRUE, legend=TRUE, labeling_args=list(rot_labels=c(bottom=90,top=0),gp_labels=(gpar(fontsize=12))))

# Pruebas de otros mosaicos
## OPCION 1
mosaicplot(tab_covered, shade = TRUE, las=2,
           main = "housetasks")

## OPCION 2
# plot just a subset of the table
assoc(head(tab_covered, 5), shade = TRUE, las=2)

# Chi2
chisq <- chisq.test(tab_covered)
chisq
## 
##  Pearson's Chi-squared test
## 
## data:  tab_covered
## X-squared = 11.524, df = 2, p-value = 0.003145
# Observed counts
chisq$observed
##                  covered
## healthy status    yes  no
##   healthy         322 167
##   ozone            62  63
##   ozone and other 319 180
# Expected counts
round(chisq$expected,2)
##                  covered
## healthy status       yes     no
##   healthy         308.87 180.13
##   ozone            78.95  46.05
##   ozone and other 315.18 183.82
# Pearson residuals (residuos estandarizados)
round(chisq$residuals, 3)
##                  covered
## healthy status       yes     no
##   healthy          0.747 -0.979
##   ozone           -1.908  2.498
##   ozone and other  0.215 -0.282
# Visuaalize Pearson residuals
corrplot(chisq$residuals, is.cor = FALSE)

# Contibution in percentage (%)
contrib <- 100*chisq$residuals^2/chisq$statistic
round(contrib, 3)
##                  covered
## healthy status       yes     no
##   healthy          4.847  8.311
##   ozone           31.589 54.163
##   ozone and other  0.401  0.688
# Visualize the contribution
corrplot(contrib, is.cor = FALSE)

Multiplot

multiplot(p_3a, p_3c, p_3b, p_3d, cols=2)

##                 reforested yes  no
## healthy status                    
## healthy                    106 383
## ozone                       31  94
## ozone and other             64 435
##                 covered yes  no
## healthy status                 
## healthy                 322 167
## ozone                    62  63
## ozone and other         319 180

Figure 4

Plot 4a

p <- filter(muestreo_tidy, tree_heigth<15, tree_nodes>0) %>% 
     ggplot(.) +
     scale_fill_manual(values= my_cols, breaks = desired_order,
                    labels= desired_names,
                    name= "Health status") +
theme_bw()

p4_a <- p + geom_histogram(aes(x=tree_nodes, 
                      fill=tree_health_simplified))  +
    labs(x="Tree age (years)", y= "Number of trees") +
    theme(text = element_text(size = 20)) +
     theme(plot.title = element_text(lineheight=1.1, face="bold"))+
  ggtitle("a)")
p4_a

Plot 4b

## base data
# Definir plantas sanas y dañadas por otra cosa que no fuera ozono
# cond_PO<- se  refiere a condition Percentage damage by Ozone 
cond_PO<-as_data_frame(muestreo_tidy)
## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
# Asignar 0% de daño por ozono a los árboles healthy
cond_PO$ozone_damage_percentage = ifelse(cond_PO$tree_health == "healthy", "0%", cond_PO$ozone_damage_percentage)

# Filtrar por porcentaje de daño
condition_PO<-cond_PO%>%
  filter(ozone_damage_percentage == "0%" | ozone_damage_percentage == "less than 10%" | ozone_damage_percentage == "10 to 40%" | ozone_damage_percentage == "40 to 50%"| ozone_damage_percentage == "50 to 70%" | ozone_damage_percentage == "more than 70%")

condition_PO$ozone_damage_percentage <- as.factor(condition_PO$ozone_damage_percentage)


# Plot
p_od<- condition_PO %>% filter(!is.na(ozone_damage_percentage)) %>%
            ggplot() +
            scale_fill_manual(values= my_cols2, 
                              breaks = desired_order_percentage,
                              labels = c("0%","less 10%", "10 to 40%", "40 to 50%",
                                         "50 to 70%", "more 70%"),
                              name= "Ozone damage\n per tree") +
            theme_bw() + theme(text = element_text(size = 20)) 

p4_b <- p_od +
  geom_bar(aes(x=tree_nodes,
               fill=ozone_damage_percentage)) +
  labs(x="Tree age (years)", y= "Number of trees") +
  theme(legend.title.align = 0.5)+
  theme(plot.title = element_text(lineheight=1.1, face="bold"))+
  ggtitle("b)")

p4_b
## Warning: Removed 147 rows containing non-finite values (stat_count).

Plot 4c

# Filtrar por categoría de daño
condition_HOO<-muestreo_tidy%>%
  filter(tree_health_simplified == "healthy" | tree_health_simplified == "ozone" | tree_health_simplified == "ozone_and_other" )

condition_HOO$tree_health_simplified <- as.factor(condition_HOO$tree_health_simplified)


# Data distribution

# Los datos tienen a graficar es el número de nodos para cada categoria de salud.
# Los datos son continuos discretos, por lo tanto el analisis a seguir para buscar diferencias entre los grupos son:

shapiro.test(condition_HOO$tree_nodes) #NO HAY NORMALIDAD 
## 
##  Shapiro-Wilk normality test
## 
## data:  condition_HOO$tree_nodes
## W = 0.94294, p-value < 2.2e-16
# Procedo a hacer una prueba no paramétrica (kruskal)

# Debe tener Homocedasticidad: la varianza debe de ser constante entre todos los grupos.
# If the p-value is less than our chosen significance level, we can reject the null hypothesis and conclude that we have enough evidence to state that the variance among the groups is not equal.
leveneTest(sqrt(tree_nodes) ~ tree_health_simplified, data = condition_HOO) #Opera con medias
# As the p value obtained from the Shapiro-Wilk test and Levene’s test is significant (p < 0.05), we conclude that the data is not normally distributed and does not have equal variance. Kruskal-Wallis test is more appropriate for analyzing differences.

kruskal.test(sqrt(tree_nodes) ~ tree_health_simplified, data = condition_HOO) #opera con mediana
## 
##  Kruskal-Wallis rank sum test
## 
## data:  sqrt(tree_nodes) by tree_health_simplified
## Kruskal-Wallis chi-squared = 265.57, df = 2, p-value < 2.2e-16
#As the p value obtained from the Kruskal-Wallis test test is significant , we conclude that there are significant differences.

# For the Kruskal-Wallis test, epsilon-squared is a method of choice for effect size measurement. The epsilon-squared is 0.74 and suggests a very strong effect of plant varieties on yield

# calculate effect size. Hay efecto relativamente fuerte
library(rcompanion)
epsilonSquared(x = sqrt(condition_HOO$tree_nodes), g = condition_HOO$tree_health_simplified)
## epsilon.squared 
##           0.239
# https://peterstatistics.com/CrashCourse/3-TwoVarUnpair/NomOrd/NomOrd3c.html
# To know which plant varieties are significantly different from each other, we will perform the Dunn’s test as post-hoc test for significant Kruskal-Wallis test. As there are multiple comparisons, we will correct the p values using Benjamini-Hochberg FDR method for multiple hypothesis testing at a 5% cut-off. The other tests that can be used for post-hoc test includes Conover and Nemenyi tests. Dunn’s test is more appropriate post-hoc than the Mann-Whitney U test for significant Kruskal-Wallis test as it retains the rank sums of the Kruskal-Wallis.

#poshoc para saber que grupos difieren
pairwise.wilcox.test(x = sqrt(condition_HOO$tree_nodes), g = condition_HOO$tree_health_simplified, p.adjust.method = "holm" )
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  sqrt(condition_HOO$tree_nodes) and condition_HOO$tree_health_simplified 
## 
##                 healthy ozone
## ozone           <2e-16  -    
## ozone_and_other <2e-16  0.012
## 
## P value adjustment method: holm
# Perorm pairwise comparisons
library(ggpubr)
my_comparisons <- list( c("healthy", "ozone"), c("healthy", "ozone_and_other"), c("ozone", "ozone_and_other") )


p4_c<-condition_HOO%>%
   ggplot(aes(y= tree_nodes, x= tree_health_simplified))+
          geom_boxplot(color="grey", notch = F)+
        scale_color_manual(values=  my_cols, labels= desired_names,
                    name= "Health status")+
        geom_point(position="jitter",aes(color = tree_health_simplified), alpha=0.5, size= 2.5)+
        xlab("")+ ylab("Tree age (years)")+
  theme_bw()+
  ggtitle("c)")+
  theme(text = element_text(size = 20), axis.text.x=element_blank())+
  theme(plot.title = element_text(lineheight=1.1, face="bold"))+
  stat_compare_means(comparisons = my_comparisons, label = "p.signif")
p4_c
## Warning: Removed 147 rows containing non-finite values (stat_boxplot).
## Warning: Removed 147 rows containing non-finite values (stat_signif).
## Warning: Removed 147 rows containing missing values (geom_point).

Plot 4d

# Perorm pairwise comparisons
# Run Shapiro-Wilk test
shapiro.test(condition_PO$tree_nodes) #NO HAY NORMALIDAD
## 
##  Shapiro-Wilk normality test
## 
## data:  condition_PO$tree_nodes
## W = 0.94294, p-value < 2.2e-16
# Procedo a hacer un kruskal
# Debe tener homogeneidad. Si es mayor a 0.05 No hay evidencias en contra de la homogeneidad de varianzas. 
leveneTest(sqrt(tree_nodes) ~ ozone_damage_percentage, data = condition_PO, center = "median")
kruskal.test(sqrt(tree_nodes) ~ ozone_damage_percentage, data = condition_PO)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  sqrt(tree_nodes) by ozone_damage_percentage
## Kruskal-Wallis chi-squared = 283.95, df = 5, p-value < 2.2e-16
epsilonSquared(x = sqrt(condition_PO$tree_nodes), g = condition_PO$ozone_damage_percentage)
## epsilon.squared 
##           0.255
#poshoc que grupos difieren

#order levels 
condition_PO$ozone_damage_percentage<- ordered(condition_PO$ozone_damage_percentage, levels=c("0%","less than 10%", "10 to 40%", "40 to 50%", "50 to 70%","more than 70%"))

#Prueba
pairwise.wilcox.test(x = sqrt(condition_PO$tree_nodes), g = condition_PO$ozone_damage_percentage, p.adjust.method = "bonferroni" )
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  sqrt(condition_PO$tree_nodes) and condition_PO$ozone_damage_percentage 
## 
##               0%      less than 10% 10 to 40% 40 to 50% 50 to 70%
## less than 10% < 2e-16 -             -         -         -        
## 10 to 40%     < 2e-16 0.6498        -         -         -        
## 40 to 50%     < 2e-16 0.2086        1.0000    -         -        
## 50 to 70%     < 2e-16 2.9e-06       0.0026    0.5273    -        
## more than 70% 1.7e-05 1.0000        1.0000    1.0000    0.2060   
## 
## P value adjustment method: bonferroni
my_comparisons <- list(c("0%", "less than 10%"), c("0%", "10 to 40%"), c("0%", "40 to 50%"),
                        c("0%", "50 to 70%"), c("0%", "more than 70%"),
                        c("less than 10%", "50 to 70%"), 
                       c("10 to 40%", "50 to 70%"))

# Plot
p4_d<-condition_PO%>% filter(!is.na(ozone_damage_percentage)) %>% 
   ggplot(aes(y= tree_nodes, x= ozone_damage_percentage))+
          geom_boxplot(color="grey", notch = F)+
        scale_color_manual(values=  my_cols2,labels = c("0%","less 10%", "10 to 40%", "40 to 50%",
                                         "50 to 70%", "more 70%"))+
        geom_point(position="jitter",aes(color = ozone_damage_percentage), alpha=0.5, size= 2.5)+
        xlab("")+ ylab("Tree age (years)")+
  labs(color = "Ozone damage\n per tree")+
  theme_bw()+
  ggtitle("d)")+
  theme(legend.title.align = 0.5)+
  theme(text = element_text(size = 20), axis.text.x=element_blank())+
  theme(plot.title = element_text(lineheight=1.1, face="bold"))+
  ggpubr::stat_compare_means(comparisons = my_comparisons, label="p.signif")

p4_d
## Warning: Removed 147 rows containing non-finite values (stat_boxplot).
## Warning: Removed 147 rows containing non-finite values (stat_signif).
## Warning: Removed 147 rows containing missing values (geom_point).

Multiplot

multiplot(p4_a, p4_c, p4_b, p4_d, cols=2)
## Warning: Removed 147 rows containing non-finite values (stat_boxplot).
## Warning: Removed 147 rows containing non-finite values (stat_signif).
## Warning: Removed 147 rows containing missing values (geom_point).
## Warning: Removed 147 rows containing non-finite values (stat_count).
## Warning: Removed 147 rows containing non-finite values (stat_boxplot).
## Warning: Removed 147 rows containing non-finite values (stat_signif).
## Warning: Removed 147 rows containing missing values (geom_point).

Figure 5

# Filtrar por categoría de daño
condition_HOO<-muestreo_tidy%>%
  filter(tree_health_simplified == "healthy" | tree_health_simplified == "ozone" | tree_health_simplified == "ozone_and_other" )

condition_HOO$tree_health_simplified <- as.factor(condition_HOO$tree_health_simplified)

# Modelo 3  - Edad, salud y estructura espacial afectan crecimiento
glm3<-glm(log10(tree_heigth) ~ tree_nodes*tree_health_simplified + tree_exposition + reforested, data = condition_HOO)

# Ho= hay normalidad, si pvalues es mayor a 0.05 se acepta la Ho por lo tanto pvalue mayor a 0.05 hay normalidad 
shapiro.test(glm3$residuals) # Normalidad ( Datos normales si es mayor a 0.05)
## 
##  Shapiro-Wilk normality test
## 
## data:  glm3$residuals
## W = 0.99778, p-value = 0.2238
cor.test(abs(glm3$residuals), glm3$fitted.values) #Homocedasticidad: Valor no significativa sig que si hay homocedasticidad
## 
##  Pearson's product-moment correlation
## 
## data:  abs(glm3$residuals) and glm3$fitted.values
## t = 1.4722, df = 964, p-value = 0.1413
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.0157601  0.1101083
## sample estimates:
##        cor 
## 0.04736209
par(mfrow =c(2,2))
plot(glm3)

summary(glm3)
## 
## Call:
## glm(formula = log10(tree_heigth) ~ tree_nodes * tree_health_simplified + 
##     tree_exposition + reforested, data = condition_HOO)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.94431  -0.19691   0.01762   0.18800   0.84646  
## 
## Coefficients:
##                                                   Estimate Std. Error t value
## (Intercept)                                      -0.694392   0.033429 -20.772
## tree_nodes                                        0.108988   0.004806  22.677
## tree_health_simplifiedozone                       0.218104   0.096575   2.258
## tree_health_simplifiedozone_and_other             0.426346   0.053116   8.027
## tree_expositionexposed                            0.069066   0.020785   3.323
## reforestedyes                                     0.032913   0.024310   1.354
## tree_nodes:tree_health_simplifiedozone           -0.013930   0.010673  -1.305
## tree_nodes:tree_health_simplifiedozone_and_other -0.037654   0.006213  -6.060
##                                                  Pr(>|t|)    
## (Intercept)                                       < 2e-16 ***
## tree_nodes                                        < 2e-16 ***
## tree_health_simplifiedozone                      0.024145 *  
## tree_health_simplifiedozone_and_other            2.92e-15 ***
## tree_expositionexposed                           0.000925 ***
## reforestedyes                                    0.176097    
## tree_nodes:tree_health_simplifiedozone           0.192146    
## tree_nodes:tree_health_simplifiedozone_and_other 1.95e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.08797878)
## 
##     Null deviance: 224.146  on 965  degrees of freedom
## Residual deviance:  84.284  on 958  degrees of freedom
##   (147 observations deleted due to missingness)
## AIC: 403.34
## 
## Number of Fisher Scoring iterations: 2
# Graficar modelo 3
Tree_height_plot<-ggplot(condition_HOO, aes(x = tree_nodes, y = log(tree_heigth))) + geom_point(aes(colour=tree_health_simplified), alpha=0.5, size= 2.5) + geom_smooth(method="glm",aes(color= tree_health_simplified), fullrange =T)+
  labs( y = "log(Tree higth)", x = "Tree age (years)", color = "Health status")+
  scale_color_manual(values=  my_cols, labels= desired_names,
                    name= "Health status")+
  theme(text = element_text(size = 20), axis.text.x=element_blank())+
  theme(plot.title = element_text(lineheight=1.1, face="bold"))+
  theme_bw()

Tree_height_plot
## Warning: Removed 147 rows containing non-finite values (stat_smooth).
## Warning: Removed 147 rows containing missing values (geom_point).

Figure S2

# plot pies in map
p_satmap <-  ggmap(sat_map)
p_satmap +geom_scatterpie(data=parcelas_tidy,
                aes(x=X_coordinates_longitude,
                    y=X_coordinates_latitude,
                    group=plot),
                pie_scale = 1.5,
                cols=desired_order,
                color=NA,
                alpha=1)  +
  scale_fill_manual(values= my_cols, breaks = desired_order,
                    labels= desired_names,
                    name= "Health status") +
  theme(text = element_text(size = 20))

Figure S3

# Create new variable with porcentage of ozone damage
parcelas_tidy<-parcelas_tidy %>% rowwise() %>% 
                     mutate(., 
                      total=sum(healthy,ozone,ozone_and_other,
                          drougth, acid_rain, other,
                          others_combined, dead, fungi,
                          # insect, 
                          worm)) %>%
                    mutate(perc.ozone= sum(ozone, ozone_and_other)/total)

#plot
p <- ggplot(parcelas_tidy) +
     geom_point(aes(x=X_coordinates_altitude,
             y=perc.ozone))



p<- ggplot(parcelas_tidy, aes(X_coordinates_altitude, perc.ozone ))+
  geom_point(color= "grey50", size = 3, alpha = 0.6)

p + 
  stat_smooth(color = "skyblue", formula = y ~ x,fill = "skyblue", method = "lm") +
  stat_poly_eq(
    aes(label = paste(..eq.label.., ..adj.rr.label.., sep = '~~~~')),
    formula = y ~ x,  parse = TRUE,
      size = 10, # Tamaño de fuente de la fórmula
             label.x = 0.1, #location, la proporción entre 0-1
      label.y = 0.95)+
  labs(x="Plot altitude", y= "Percentage of ozone damaged trees")+
  theme_bw() +
   theme(plot.title = element_text(lineheight=1.1, face="bold")) +
  theme(text = element_text(size = 20))

Figura S4

RESULTS Participatory monitoring

# No se incluye a M (37) y PP (6), ya que eran colectados por diferentes miembros del team
trees<- c(135,133,130,174,150,139,165,166,133,137,125,105)
rangers<- c("A","B","C","D","E","F","G","H", "I", "J", "K", "L")
mean(trees)
## [1] 141
sd(trees)
## [1] 19.60519

Damage percentage

# Create new variable with porcentage of ozone damage
parcelas_tidy<-parcelas_tidy %>% rowwise() %>% 
                     mutate(., 
                      total=sum(healthy,ozone,ozone_and_other,
                          drougth, acid_rain, other,
                          others_combined, dead, fungi,
                          # insect, 
                          worm)) %>%
                    mutate(perc.other= sum(healthy,
                          drougth, acid_rain, other,
                          others_combined, dead, fungi,worm)/total)

status_HS<- c("healthy","ozone","ozone_and_other",
                          "drougth", "acid_rain", "other",
                          "others_combined", "dead", "fungi",
                          "worm")
count_HS<-c(sum(parcelas_tidy$healthy), sum(parcelas_tidy$ozone),
            sum(parcelas_tidy$ozone_and_other),sum(parcelas_tidy$drougth),
            sum(parcelas_tidy$acid_rain),sum(parcelas_tidy$other),
            sum(parcelas_tidy$others_combined), sum(parcelas_tidy$dead),
            sum(parcelas_tidy$fungi), sum(parcelas_tidy$worm))

percentageH<-data.frame(status_HS, count_HS)

(sum(c(parcelas_tidy$ozone, parcelas_tidy$ozone_and_other))*100)/sum(count_HS)
## [1] 35.43307
(sum(parcelas_tidy$healthy)*100)/sum(count_HS)
## [1] 27.67154
(sum(c(parcelas_tidy$drougth, parcelas_tidy$acid_rain , parcelas_tidy$other, parcelas_tidy$others_combined,parcelas_tidy$dead, parcelas_tidy$fungi, parcelas_tidy$worm))*100)/sum(count_HS)
## [1] 36.89539

Percentage damage in plots

#Percentage damage plots

PDP<-data.frame(parcelas_tidy$plot, parcelas_tidy$perc.ozone)

PDP<-PDP[order(PDP$parcelas_tidy.plot),]
range(PDP$parcelas_tidy.perc.ozone)
## [1] 0.0000000 0.8181818
head(PDP[order(PDP$parcelas_tidy.perc.ozone),])